LPPL model 

# 0. Setup and packages
# -------------------------
if(!requireNamespace("readxl", quietly = TRUE)) install.packages("readxl")
if(!requireNamespace("minpack.lm", quietly = TRUE)) install.packages("minpack.lm")
if(!requireNamespace("ggplot2", quietly = TRUE)) install.packages("ggplot2")
if(!requireNamespace("gridExtra", quietly = TRUE)) install.packages("gridExtra")
if(!requireNamespace("zoo", quietly = TRUE)) install.packages("zoo")
if(!requireNamespace("scales", quietly = TRUE)) install.packages("scales")

library(readxl)
library(minpack.lm)
library(ggplot2)
library(gridExtra)
library(zoo)
library(scales)



# Parse date robustly
## Insert data and name it raw
raw$date <- as.Date(raw$date)
if(any(is.na(raw$date))){
  # try common formats
  raw$date <- as.Date(as.character(raw$date), tryFormats = c("%Y-%m-%d", "%d/%m/%Y", "%m/%d/%Y"))
}
# Ensure numeric price
raw$price <- as.numeric(raw$price)

# Remove rows with missing essential data and sort
raw <- raw[!is.na(raw$date) & !is.na(raw$price), ]
raw <- raw[order(raw$date), ]
n <- nrow(raw)
cat("Observations n =", n, "\n")
# -------------------------
# 2. Time index and log-price
# -------------------------
# Use days since first observation so tc maps to calendar dates
t_days <- as.numeric(difftime(raw$date, raw$date[1], units = "days"))  # starts at 0
t <- t_days + 1   # shift so first obs = 1
logp <- log(raw$price)

# Quick plot of raw series
p1 <- ggplot(raw, aes(x = date, y = price)) +
  geom_line() + geom_point(size = 0.6) +
  labs(title = "Silver price", x = "Date", y = "Price (in USD)") +
  theme_minimal()
print(p1)
# -------------------------
# 3. LPPL model definition
# -------------------------
lppl_fun <- function(par, t){
  A <- par["A"]; B <- par["B"]; C <- par["C"]
  m <- par["m"]; tc <- par["tc"]; omega <- par["omega"]; phi <- par["phi"]
  # ensure tc - t positive inside domain; caller must ensure tc > max(t)
  A + B * (tc - t)^m * (1 + C * cos(omega * log(tc - t) + phi))
}

# Residual function for nlsLM (observed - model)
resid_fn <- function(par) {
  as.numeric(logp - lppl_fun(par, t))
}

# -------------------------
# 4. Initial guesses and bounds
# -------------------------
# Automatic initial guesses based on data
A_init <- mean(logp)
B_init <- -0.5 * (max(logp) - min(logp)) / ((max(t) - min(t))^0.5)  # heuristic negative
C_init <- 0.1
m_init <- 0.5
tc_init <- max(t) + round(0.05 * (max(t) - min(t))) + 10  # a bit beyond last obs
omega_init <- 8
phi_init <- 0

init <- c(A = A_init, B = B_init, C = C_init, m = m_init, tc = tc_init, omega = omega_init, phi = phi_init)

# Bounds: ensure m in (0,1), tc > last observation, omega positive, C moderate
lower <- c(A = -Inf, B = -Inf, C = -1, m = 0.01, tc = max(t) + 1, omega = 2, phi = -2*pi)
upper <- c(A = Inf, B = Inf, C = 1, m = 1, tc = max(t) + 365*3, omega = 50, phi = 2*pi)

cat("Initial parameters:\n"); print(init)

# -------------------------
# 5. Fit LPPL using nlsLM
# -------------------------
set.seed(123)
fit_try <- try(
  nls.lm(par = init, fn = resid_fn, lower = lower, upper = upper,
         control = nls.lm.control(maxiter = 500, ftol = 1e-10)),
  silent = TRUE
)

if(inherits(fit_try, "try-error")){
  stop("LPPL fit failed. Try different initial guesses or narrower bounds.")
}
fit <- fit_try
coef_est <- coef(fit)
print(coef_est)

# -------------------------
# 6. Fitted values residuals and diagnostics
# -------------------------
fitted_logp <- lppl_fun(coef_est, t)
resids <- as.numeric(logp - fitted_logp)

# R-squared
rss <- sum(resids^2)
tss <- sum((logp - mean(logp))^2)
R2 <- 1 - rss/tss

# AIC (approximate for least squares)
sigma2 <- rss / (length(logp) - length(coef_est))
logLik_approx <- -0.5 * length(logp) * (log(2*pi) + log(sigma2) + 1)
AIC_approx <- -2 * logLik_approx + 2 * length(coef_est)

cat(sprintf("R-squared: %.4f\n", R2))
cat(sprintf("Approx AIC: %.2f\n", AIC_approx))

# Parameter standard errors via Jacobian approximation
J <- fit$jac  # Jacobian matrix from nls.lm
# compute covariance matrix: cov = sigma2 * (J'J)^{-1}
cov_par <- try(solve(t(J) %*% J) * sigma2, silent = TRUE)
if(inherits(cov_par, "try-error")){
  se_par <- rep(NA, length(coef_est))
} else {
  se_par <- sqrt(diag(cov_par))
}
param_table <- data.frame(Estimate = coef_est, StdError = se_par, row.names = names(coef_est))
print(param_table)

# Residual diagnostics plots
p_resid_ts <- ggplot(data.frame(date = raw$date, res = resids), aes(x = date, y = res)) +
  geom_line() + geom_hline(yintercept = 0, linetype = "dashed") +
  labs(title = "Residuals over time", x = "Date", y = "Residual (log-price)") + theme_minimal()
# ensure you have enough residuals
if(length(resids) < 2) stop("Not enough residuals to compute ACF")

# choose lag_max no larger than length(resids)-1
lag_max <- min(50, length(resids) - 1)

# compute acf up to lag_max (acf returns lag 0..lag_max)
acf_obj <- acf(resids, plot = FALSE, lag.max = lag_max)
# remove lag 0 and build matching vectors
acf_vals <- as.numeric(acf_obj$acf)[-1]   # remove lag 0
lags <- 1:lag_max
# sanity check
stopifnot(length(acf_vals) == length(lags))
# plot
p_resid_acf <- ggplot(data.frame(lag = lags, acf = acf_vals),
                      aes(x = lag, y = acf)) +
  geom_col() +
  labs(title = "ACF of residuals", x = "Lag", y = "ACF") +
  theme_minimal()
print(p_resid_acf)

p_qq <- ggplot(data.frame(sample = resids), aes(sample = sample)) +
  stat_qq() + stat_qq_line() + labs(title = "QQ plot residuals") + theme_minimal()
grid.arrange(p_resid_ts, p_qq, ncol = 1)
# Statistical tests
lb_test <- try(stats::Box.test(resids, lag = 20, type = "Ljung-Box"), silent = TRUE)
shapiro_test <- try(shapiro.test(resids), silent = TRUE)
cat("Ljung-Box test:\n"); print(lb_test)
cat("Shapiro-Wilk test:\n"); print(shapiro_test)
bubble_flags <- list(
  m_in_01 = (coef_est["m"] > 0 && coef_est["m"] < 1),
  B_negative = (coef_est["B"] < 0),
  tc_future = (coef_est["tc"] > max(t)),
  C_moderate = (abs(coef_est["C"]) < 1)
)
print(bubble_flags)
cat("If most flags TRUE then LPPL bubble signature is present.\n")

# Convert numeric tc to calendar date (t is days since first obs +1)
tc_numeric <- coef_est["tc"]
tc_date <- raw$date[1] + as.difftime(tc_numeric - 1, units = "days")
cat("Estimated tc numeric:", tc_numeric, " -> date:", as.character(tc_date), "\n")

# -------------------------
# 8. Bootstrap for tc uncertainty and out-of-sample correction window
# -------------------------
set.seed(123)
nboot <- 500   # adjust for more precision (longer runtime)
tc_boot <- rep(NA, nboot)
params_boot <- matrix(NA, nrow = nboot, ncol = length(coef_est))
colnames(params_boot) <- names(coef_est)
# Use residual bootstrap: resample residuals and add to fitted to create synthetic series
for(i in 1:nboot){
  # sample residuals with replacement
  res_samp <- sample(resids, replace = TRUE)
  logp_boot <- fitted_logp + res_samp
  # define residual function for bootstrap
  resid_boot_fn <- function(par) as.numeric(logp_boot - lppl_fun(par, t))
  # try to refit using previous estimates as start
  fitb_try <- try(
    nls.lm(par = coef_est, fn = resid_boot_fn, lower = lower, upper = upper,
           control = nls.lm.control(maxiter = 300, ftol = 1e-8)),
    silent = TRUE
  )
  if(!inherits(fitb_try, "try-error")){
    p_est <- coef(fitb_try)
    tc_boot[i] <- p_est["tc"]
    params_boot[i, ] <- p_est
  } else {
    # leave NA if fit failed
    tc_boot[i] <- NA
  }
  if(i %% 50 == 0) cat("Bootstrap iteration", i, "completed\n")
}
# Clean bootstrap results
valid_idx <- which(!is.na(tc_boot))
tc_boot_valid <- tc_boot[valid_idx]
params_boot_valid <- params_boot[valid_idx, , drop = FALSE]
cat("Successful bootstrap fits:", length(tc_boot_valid), "out of", nboot, "\n")

# Summary statistics for tc
tc_quantiles <- quantile(tc_boot_valid, probs = c(0.05, 0.25, 0.5, 0.75, 0.95), na.rm = TRUE)
tc_dates <- raw$date[1] + as.difftime(tc_quantiles - 1, units = "days")
tc_summary <- data.frame(quantile = names(tc_quantiles), tc_numeric = as.numeric(tc_quantiles), tc_date = as.Date(tc_dates))
print(tc_summary)
# Plot bootstrap distribution of tc
df_tc <- data.frame(tc = tc_boot_valid)
p_tc_hist <- ggplot(df_tc, aes(x = tc)) +
  geom_histogram(bins = 40, fill = "steelblue", color = "white") +
  geom_vline(xintercept = coef_est["tc"], color = "red", linetype = "dashed") +
  scale_x_continuous(labels = function(x) as.character(raw$date[1] + as.difftime(x - 1, units = "days"))) +
  labs(title = "Bootstrap distribution of tc", x = "tc (calendar date)", y = "Count") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
print(p_tc_hist)

# -------------------------
# 9. Out-of-sample forecast to tc and correction window
# -------------------------
# Create a forecast horizon up to median tc + margin
tc_median <- median(tc_boot_valid, na.rm = TRUE)
horizon_days <- ceiling(max(0, tc_median - max(t)) + 30)  # 30 days beyond median tc
t_forecast <- seq(min(t), max(t) + horizon_days, by = 1)
fitted_full <- lppl_fun(coef_est, t_forecast)

# Convert to price scale
price_fit_full <- exp(fitted_full)

# Build forecast dataframe for plotting
date_forecast <- raw$date[1] + as.difftime(t_forecast - 1, units = "days")
df_forecast <- data.frame(date = date_forecast, price_fit = price_fit_full)
df_obs <- data.frame(date = raw$date, price = raw$price)
p_forecast <- ggplot() +
  geom_line(data = df_obs, aes(x = date, y = price), color = "black") +
  geom_line(data = df_forecast, aes(x = date, y = price_fit), color = "red") +
  geom_vline(xintercept = as.Date(tc_date), color = "red", linetype = "dashed") +
  labs(title = "LPPL fit and forecast to estimated tc", x = "Date", y = "Price") +
  theme_minimal()
print(p_forecast)

# Use bootstrap tc quantiles to indicate correction window
tc_low <- tc_summary$tc_date[tc_summary$quantile == "5%"]
tc_high <- tc_summary$tc_date[tc_summary$quantile == "95%"]
cat("90% bootstrap correction window from", as.character(tc_low), "to", as.character(tc_high), "\n")

# -------------------------
# 10. Output summary and save results
# -------------------------
results <- list(
  n = n,
  coef = coef_est,
  se = se_par,
  param_table = param_table,
  R2 = R2,
  AIC = AIC_approx,
  tc_numeric = tc_numeric,
  tc_date = as.Date(tc_date),
  tc_boot = tc_boot_valid,
  tc_summary = tc_summary,
  bubble_flags = bubble_flags
)
print("Parameter estimates")
print(results$param_table)
cat("Estimated tc date:", as.character(results$tc_date), "\n")
cat("Bootstrap median tc date:", as.character(raw$date[1] + as.difftime(median(tc_boot_valid, na.rm = TRUE) - 1, units = "days")), "\n")

# Optionally save results to RDS and plots to files
# Uncomment to save
# saveRDS(results, file = "lppl_results.rds")
# ggsave("lppl_fit_forecast.png", p_forecast, width = 10, height = 6)
# ggsave("lppl_tc_boot_hist.png", p_tc_hist, width = 8, height = 4)

# -------------------------
# End of script
# -------------------------

Block boostrap
# Modified block bootstrap that returns tc draws and parameter matrix
block_boot_full <- function(logp, fitted, t, resids,
                            init_par = coef_est,
                            lower = lower, upper = upper,
                            block_len = 10, nboot = 500,
                            maxiter = 200, verbose = TRUE) {
  n <- length(logp)
  nb <- floor(n / block_len)
  tc_boot <- rep(NA_real_, nboot)
  params_boot <- matrix(NA_real_, nrow = nboot, ncol = length(init_par))
  colnames(params_boot) <- names(init_par)

  for (b in seq_len(nboot)) {
    # sample non-overlapping blocks with replacement
    blocks <- sample(0:(nb - 1), nb, replace = TRUE)
    res_boot <- unlist(lapply(blocks, function(k) {
      start <- k * block_len + 1
      end <- min(start + block_len - 1, n)
      resids[start:end]
    }), use.names = FALSE)

    # trim or pad to length n
    if (length(res_boot) < n) {
      res_boot <- c(res_boot, sample(resids, n - length(res_boot), replace = TRUE))
    } else if (length(res_boot) > n) {
      res_boot <- res_boot[1:n]
    }

    # synthetic log-price
    logp_b <- fitted + res_boot

    # residual function for this bootstrap sample
    resid_boot_fn <- function(par) as.numeric(logp_b - lppl_fun(par, t))

    # try refit using previous estimate as start
    fitb_try <- try(
      nls.lm(par = init_par, fn = resid_boot_fn,
             lower = lower, upper = upper,
             control = nls.lm.control(maxiter = maxiter, ftol = 1e-8)),
      silent = TRUE
    )
    if (!inherits(fitb_try, "try-error")) {
      p_est <- coef(fitb_try)
      tc_boot[b] <- p_est["tc"]
      params_boot[b, ] <- p_est
    }

    if (verbose && (b %% 50 == 0)) {
      message("Bootstrap iteration ", b, " / ", nboot, " completed")
    }
  }

  list(tc = tc_boot, params = params_boot)
}


# Set parameters
block_len <- 10      # adjust block length to match residual autocorrelation
nboot <- 500         # increase if you want tighter sampling of bootstrap distribution
maxiter <- 300

# Run bootstrap (uses objects: logp, fitted_logp or fitted, t, resids, coef_est, lower, upper)
# Note: in your earlier code 'fitted' variable name was used; ensure it is fitted_logp or fitted
# Here I assume 'fitted' is the fitted log-price vector (fitted_logp)
boot_res <- block_boot_full(logp = logp,
                            fitted = fitted_logp,
                            t = t,
                            resids = resids,
                            init_par = coef_est,
                            lower = lower,
                            upper = upper,
                            block_len = block_len,
                            nboot = nboot,
                            maxiter = maxiter,
                            verbose = TRUE)

# Extract and clean
tc_all <- boot_res$tc
params_all <- boot_res$params
success_idx <- which(!is.na(tc_all))
tc_valid <- tc_all[success_idx]
params_valid <- params_all[success_idx, , drop = FALSE]
n_success <- length(tc_valid)
cat("Successful bootstrap fits:", n_success, "out of", nboot, "\n")

# Summary quantiles for tc (numeric)
tc_q <- quantile(tc_valid, probs = c(0.05, 0.25, 0.5, 0.75, 0.95), na.rm = TRUE)
tc_q

# Convert numeric tc to calendar dates (if t is days since raw$date[1] + 1)
tc_dates <- raw$date[1] + as.difftime(tc_valid - 1, units = "days")
tc_q_dates <- raw$date[1] + as.difftime(as.numeric(tc_q) - 1, units = "days")

tc_summary <- data.frame(
  quantile = names(tc_q),
  tc_numeric = as.numeric(tc_q),
  tc_date = as.Date(tc_q_dates)
)
print(tc_summary)

library(ggplot2)

# Build data frame for plotting
df_tc <- data.frame(tc_numeric = tc_valid,
                    tc_date = as.Date(raw$date[1] + as.difftime(tc_valid - 1, units = "days")))

# Histogram on numeric axis with date labels
p_tc_hist <- ggplot(df_tc, aes(x = tc_date)) +
  geom_histogram(bins = 40, fill = "steelblue", color = "white") +
  geom_vline(xintercept = as.Date(raw$date[1] + as.difftime(coef_est["tc"] - 1, units = "days")),
             color = "red", linetype = "dashed", size = 0.8) +
  labs(title = "Block bootstrap distribution of tc (calendar dates)",
       x = "tc (date)", y = "Count") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
print(p_tc_hist)

# Evaluate model at tc - eps for each successful bootstrap parameter set
eps <- 1  # days before tc to avoid numerical issues
peak_price_boot <- numeric(n_success)
for (i in seq_len(n_success)) {
  p <- params_valid[i, ]
  t_peak <- p["tc"] - eps
  # ensure t_peak > max(t) - if not, set to max(t)
  if (t_peak <= max(t)) t_peak <- max(t)
  logp_peak <- lppl_fun(p, t_peak)
  peak_price_boot[i] <- exp(logp_peak)
}

# Summary of peak price distribution
quantile(peak_price_boot, probs = c(0.05, 0.5, 0.95), na.rm = TRUE)

# 1) Success rate
cat("Bootstrap success rate:", round(100 * n_success / nboot, 1), "%\n")

# 2) Compare original tc to bootstrap median
tc_median_boot <- median(tc_valid, na.rm = TRUE)
tc_median_date <- raw$date[1] + as.difftime(tc_median_boot - 1, units = "days")
cat("Original tc numeric:", coef_est["tc"], "->", as.character(raw$date[1] + as.difftime(coef_est["tc"] - 1, units='days')), "\n")
cat("Bootstrap median tc numeric:", tc_median_boot, "->", as.character(tc_median_date), "\n")




Wilder
# -------------------------
# Wild bootstrap full script
# -------------------------
library(ggplot2)

# Wild bootstrap function that returns tc draws and full parameter matrix
wild_boot_full <- function(logp, fitted, resids, t,
                           init_par = coef_est,
                           lower = lower, upper = upper,
                           nboot = 500, eps = 1,
                           maxiter = 300, verbose = TRUE) {
  n <- length(logp)
  tc_boot <- rep(NA_real_, nboot)
  params_boot <- matrix(NA_real_, nrow = nboot, ncol = length(init_par))
  colnames(params_boot) <- names(init_par)

  for (i in seq_len(nboot)) {
    # Rademacher multipliers (-1 or +1)
    u <- sample(c(-1, 1), n, replace = TRUE)
    logp_b <- fitted + resids * u

    # residual function for bootstrap sample
    resid_boot_fn <- function(par) as.numeric(logp_b - lppl_fun(par, t))

    # try refit using previous estimate as start
    fitb_try <- try(
      nls.lm(par = init_par, fn = resid_boot_fn,
             lower = lower, upper = upper,
             control = nls.lm.control(maxiter = maxiter, ftol = 1e-8)),
      silent = TRUE
    )

    if (!inherits(fitb_try, "try-error")) {
      p_est <- coef(fitb_try)
      tc_boot[i] <- p_est["tc"]
      params_boot[i, ] <- p_est
    }

    if (verbose && (i %% 50 == 0)) message("Wild bootstrap iteration ", i, " / ", nboot, " completed")
  }

  list(tc = tc_boot, params = params_boot)
}

# Parameters for the bootstrap
nboot <- 500        # increase for more precision
eps <- 1            # days before tc to evaluate peak price
maxiter <- 300

# Run wild bootstrap
set.seed(123)
wild_res <- wild_boot_full(logp = logp,
                           fitted = fitted_logp,
                           resids = resids,
                           t = t,
                           init_par = coef_est,
                           lower = lower,
                           upper = upper,
                           nboot = nboot,
                           eps = eps,
                           maxiter = maxiter,
                           verbose = TRUE)

# Clean results
tc_all <- wild_res$tc
params_all <- wild_res$params
success_idx <- which(!is.na(tc_all))
tc_valid <- tc_all[success_idx]
params_valid <- params_all[success_idx, , drop = FALSE]
n_success <- length(tc_valid)
cat("Successful wild bootstrap fits:", n_success, "out of", nboot, "\n")

# Summary quantiles for tc
tc_q <- quantile(tc_valid, probs = c(0.025, 0.05, 0.25, 0.5, 0.75, 0.95, 0.975), na.rm = TRUE)
tc_q_dates <- raw$date[1] + as.difftime(as.numeric(tc_q) - 1, units = "days")
tc_summary <- data.frame(quantile = names(tc_q), tc_numeric = as.numeric(tc_q), tc_date = as.Date(tc_q_dates))
print(tc_summary)

# Plot bootstrap distribution of tc as calendar dates
df_tc <- data.frame(tc_numeric = tc_valid,
                    tc_date = as.Date(raw$date[1] + as.difftime(tc_valid - 1, units = "days")))

p_tc_hist <- ggplot(df_tc, aes(x = tc_date)) +
  geom_histogram(bins = 40, fill = "steelblue", color = "white") +
  geom_vline(xintercept = as.Date(raw$date[1] + as.difftime(coef_est["tc"] - 1, units = "days")),
             color = "red", linetype = "dashed", size = 0.8) +
  labs(title = "Wild bootstrap distribution of tc", x = "tc (date)", y = "Count") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
print(p_tc_hist)

# Compute bootstrap distribution of peak price evaluated at tc - eps
peak_price_boot <- numeric(n_success)
for (j in seq_len(n_success)) {
  p <- params_valid[j, ]
  t_peak <- p["tc"] - eps
  # if t_peak <= max(t) set to max(t) to avoid evaluating inside sample incorrectly
  if (t_peak <= max(t)) t_peak <- max(t)
  logp_peak <- lppl_fun(p, t_peak)
  peak_price_boot[j] <- exp(logp_peak)
}

# Summarize peak price distribution
peak_q <- quantile(peak_price_boot, probs = c(0.025, 0.05, 0.5, 0.95, 0.975), na.rm = TRUE)
print(round(peak_q, 2))

# Plot peak price distribution
df_peak <- data.frame(peak_price = peak_price_boot)
p_peak_hist <- ggplot(df_peak, aes(x = peak_price)) +
  geom_histogram(bins = 40, fill = "tomato", color = "white") +
  geom_vline(xintercept = exp(lppl_fun(coef_est, coef_est["tc"] - eps)), color = "red", linetype = "dashed") +
  labs(title = "Bootstrap distribution of peak price (tc - eps)", x = "Price", y = "Count") +
  theme_minimal()
print(p_peak_hist)

# Diagnostics and reporting
cat("Bootstrap success rate:", round(100 * n_success / nboot, 1), "%\n")
cat("Original tc numeric:", coef_est["tc"], "->", as.character(raw$date[1] + as.difftime(coef_est["tc"] - 1, units = "days")), "\n")
cat("Wild bootstrap median tc numeric:", median(tc_valid, na.rm = TRUE),
    "->", as.character(raw$date[1] + as.difftime(median(tc_valid, na.rm = TRUE) - 1, units = "days")), "\n")

